#delimit;
set more off;

set logtype text;
log using /home/dnc2101/Accidental_Deaths/Vital_Stats/1981/clean_data_1981.log, replace;

/*  This file cleans the raw vital stats mortality data for the year 1981                                                                                  */
/*  Specifically, it adds variable labels to the data, and creates a new smaller dataset containing only the variables from the data that I considered     */
/*  using for this project, as well as only data on deaths due to unintentional injuries, i.e. accidents, as well as intentional injuries                  */
/*  this gets the dataset into a format and size such that other later years of Vital Stats mortality data can be appended onto it                         */
/*  Program by Daniel Carvell, Original from Summer 2007, Revised in Spring 2010                                                                           */



use "/home/dnc2101/Accidental_Deaths/Vital_Stats/1981/mort1981.dta", clear;


destring, replace;
compress;

save "/home/dnc2101/Accidental_Deaths/Vital_Stats/1981/mort1981.dta", replace;

** the term "pdf" in the following variable labels refers to the PDF of the Codebook for the data from 1981 - ;
** that's the page reference for further information about the variable;
** the Codebook is available at http://www.nber.org/data/vital-statistics-mortality-data-multiple-cause-of-death.html ;

label variable datayear "year of death";
label variable reparea "reporting area p. 17 of pdf probably useless";
label variable shipno "shipment number p. 17 of pdf useless";
label variable rectype "1 if death in county of residence 2 if not, p. 17 pdf";
label variable restatus "is death in same county, same state, same country of residence p. 18 pdf";
** "Place of Occurence", p. 18 of pdf, which sounds like its the city or town in which the person died, doesn't seem to be in the data actually;
label variable countyoc "county of death, some numerical code detailed in much of codebook";
** pp. 40-74 of pdf give that code for the counties and the metropolitan areas and such;
label variable stateoc "state of death - numeric but not statefip see p. 39 pdf";
label variable region "Census Region of occurence";
label variable divstoc "Census Division and State Subcode of Occurence p. 19 pdf";
label variable exstatoc "expanded state of occurence separate value for NYC see p.39 - useless";
** exstateoc is just like stateoc but with two values for New York - NYC and outside NYC in NYS - not helpful for this project;
label variable countyrs "county of residence, some numerical code detailed in much of codebook";
** pp. 40-74 of pdf give that code for the counties and the metropolitan areas and such;
label variable staters "state of residence - numeric but not statefip see p. 39 pdf";
label variable cityrs "city of residence - numeric code - see p. 21 pdf short clear description";
label variable popsize "population size of city of residence  see p. 21 pdf";
label variable metro "1 if countyrs metropolitan 2 if not Z if foreigner";
** Z's for foreigners in the preceding 4 variables explain why these variables cannot be destrung;
label variable regnres "Census Region of residence 0 if foreign";
label variable divstres "Census Division and State Subcode of Residence p. 23 pdf";
label variable exstares "expanded state of occurence separate value for NYC see p.39 - useless";
** exstares is just like staters but with two values for New York - NYC and outside NYC in NYS - not helpful for this project;
label variable smsares "Standard Metro Statistical Area of residence p. 25 pdf";
label variable monthdth "month of death, 1 through 12";
label variable daydth "day of death, 1 through 31, 99 if unknown";
label variable sex "gender 1 if male 2 if female";
label variable race "detailed race 9 categories see p. 26 pdf";
label variable racer3 "1 if White, 3 if Black, 2 if Other";
label variable racer2 "1 if White, 2 if Not White";
label variable age "Detailed Age, p 27 pdf - if value above 100 then died before first bday, I think";
label variable ager52 "Age Recode 52, pp. 28-29 pdf gives the categories - numerically coded";
label variable ager27 "Age Recode 27, p. 29 pdf gives the categories - numerically coded";
label variable ager12 "Age Recode 12, p 30 pdf gives the categories - numerically coded"; 
label variable ager22 "Age Recode 22, see p. 20 pdf - blank unless infant";
label variable hospstat "Hospital and Status p. 31 pdf USEFUL died in hospital or not, what unit if so";
label variable marstat "Marital Status, p. 31 pdf";
label variable statebth "State of Birth p. 31 pdf, also p. 39 for coding";
label variable autopsy "1 if performed 2 if not 8 or 9 if this unknown, p. 32 pdf";
label variable fipssmsa "Standard Metro Statistical Area again, not sure why p. 32 pdf - Z if Foreign";
** Perhaps fipssmsa is the place of burial or the place of the injury?  Although it won't be in the data often if its place of injury as deaths due to injury are relatively rare;
label variable accident "Place of Accident for Causes E850-E929 see p. 33";

** All subsequent variables in the data concern the underlying cause of death, and I have not labeled them;



** Keeping only injury deaths, as identified by the Recode 282 of the underlying cause of death, in the data, ;
** so I can keep only deaths I may need in the data and get the data into a use-ably small size;

keep if (ucr282>=29900 & ucr282<=35800);

** Reassuring, the number of injury deaths in this dataset is nearly identical to the number of injury deaths in 1981 in the data on injury deaths ;
** from the CDC's WISQARS website, which Rubin and Shepherd (2007) use and which I also use;



save "/home/dnc2101/Accidental_Deaths/Vital_Stats/1981/mort1981.dta", replace;

** now just keep the variables I actually expect I may use in the regressions, to make a dataset that can have other datasets appended onto it;

keep datayear rectype restatus countyoc stateoc countyrs staters popsize metro monthdth sex race racer3 racer2 age ager52 ager27 ager12 hospstat marstat accident ucod ucr282 ucr72 ucr61 ucr34;

** now generate statefips variable from staters, state of residence - statefips statefips appears in data beginning in 1982 but not in 1981;
** While I could create state-year counts of deaths using stateoc, the state of occurrence of the death, i.e. state in which the death occurred, since the ;
** WISQARS data that I also use and that Rubin and Shepherd use creates state-year counts of deaths using state of residence of the decedent, ;
** I'm using state of residence instead;

generate fipsstr=.;
replace fipsstr=1 if staters==1; 
replace fipsstr=2 if staters==2;
replace fipsstr=4 if staters==3; 
replace fipsstr=5 if staters==4 ;
replace fipsstr=6 if staters==5; 
replace fipsstr=8 if staters==6;
replace fipsstr=9 if staters==7; 
replace fipsstr=10 if staters==8;
replace fipsstr=11 if staters==9; 
replace fipsstr=12 if staters==10;
replace fipsstr=13 if staters==11; 
replace fipsstr=15 if staters==12;
replace fipsstr=16 if staters==13; 
replace fipsstr=17 if staters==14;
replace fipsstr=18 if staters==15; 
replace fipsstr=19 if staters==16;
replace fipsstr=20 if staters==17; 
replace fipsstr=21 if staters==18;
replace fipsstr=22 if staters==19; 
replace fipsstr=23 if staters==20;
replace fipsstr=24 if staters==21; 
replace fipsstr=25 if staters==22;
replace fipsstr=26 if staters==23; 
replace fipsstr=27 if staters==24;
replace fipsstr=28 if staters==25; 
replace fipsstr=29 if staters==26;
replace fipsstr=30 if staters==27; 
replace fipsstr=31 if staters==28;
replace fipsstr=32 if staters==29; 
replace fipsstr=33 if staters==30;
replace fipsstr=34 if staters==31; 
replace fipsstr=35 if staters==32;
replace fipsstr=36 if staters==33; 
replace fipsstr=37 if staters==34;
replace fipsstr=38 if staters==35; 
replace fipsstr=39 if staters==36;
replace fipsstr=40 if staters==37; 
replace fipsstr=41 if staters==38;
replace fipsstr=42 if staters==39; 
replace fipsstr=44 if staters==40;
replace fipsstr=45 if staters==41; 
replace fipsstr=46 if staters==42;
replace fipsstr=47 if staters==43; 
replace fipsstr=48 if staters==44;
replace fipsstr=49 if staters==45; 
replace fipsstr=50 if staters==46;
replace fipsstr=51 if staters==47; 
replace fipsstr=53 if staters==48;
replace fipsstr=54 if staters==49;
replace fipsstr=55 if staters==50; 
replace fipsstr=56 if staters==51;





save "/home/dnc2101/Accidental_Deaths/Vital_Stats/Data_For_Appending/short_mort1981.dta", replace;

log close;
